#!/usr/bin/env python
"""Classes for exporting data from AFF4 to the rest of the world.

Exporters defined here convert various complex RDFValues to simple RDFValues
(without repeated fields, without recursive field definitions) that can
easily be written to a relational database or just to a set of files.
"""
from __future__ import absolute_import
from __future__ import division

from __future__ import unicode_literals

import hashlib
import logging
import time


from future.builtins import str
from future.utils import iteritems
from future.utils import iterkeys
from future.utils import itervalues
from future.utils import with_metaclass
from typing import Any
from typing import Type

from grr_response_core.lib import rdfvalue
from grr_response_core.lib import registry
from grr_response_core.lib import utils
from grr_response_core.lib.rdfvalues import client as rdf_client
from grr_response_core.lib.rdfvalues import client_fs as rdf_client_fs
from grr_response_core.lib.rdfvalues import client_network as rdf_client_network
from grr_response_core.lib.rdfvalues import file_finder as rdf_file_finder
from grr_response_core.lib.rdfvalues import flows as rdf_flows
from grr_response_core.lib.rdfvalues import memory as rdf_memory
from grr_response_core.lib.rdfvalues import osquery as rdf_osquery
from grr_response_core.lib.rdfvalues import paths as rdf_paths
from grr_response_core.lib.rdfvalues import protodict as rdf_protodict
from grr_response_core.lib.rdfvalues import structs as rdf_structs
from grr_response_core.lib.util import collection
from grr_response_core.lib.util import compatibility
from grr_response_core.lib.util import precondition
from grr_response_proto import export_pb2
from grr_response_server import aff4
from grr_response_server import data_store
from grr_response_server import data_store_utils
from grr_response_server import file_store
from grr_response_server import flow
from grr_response_server import sequential_collection
from grr_response_server.aff4_objects import aff4_grr
from grr_response_server.aff4_objects import filestore
from grr_response_server.check_lib import checks
from grr_response_server.databases import db
from grr_response_server.flows.general import collectors as flow_collectors
from grr_response_server.hunts import results as hunt_results

try:
  # pylint: disable=g-import-not-at-top
  from verify_sigs import auth_data
  from verify_sigs.asn1 import dn
  # pylint: enable=g-import-not-at-top
except ImportError:
  pass


class Error(Exception):
  """Errors generated by export converters."""


class NoConverterFound(Error):
  """Raised when no converter is found for particular value."""


class ExportError(Error):
  """Unspecified error while exporting."""


class ExportOptions(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportOptions


class ExportedMetadata(rdf_structs.RDFProtoStruct):
  """ExportMetadata RDF value."""

  protobuf = export_pb2.ExportedMetadata
  rdf_deps = [
      rdf_client.ClientURN,
      rdf_client.HardwareInfo,
      rdfvalue.RDFDatetime,
      rdfvalue.RDFURN,
      rdfvalue.SessionID,
  ]

  def __init__(self, initializer=None, age=None, payload=None, **kwarg):
    super(ExportedMetadata, self).__init__(
        initializer=initializer, age=age, **kwarg)

    if not self.timestamp:
      self.timestamp = rdfvalue.RDFDatetime.Now()


class ExportedClient(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedClient
  rdf_deps = [
      ExportedMetadata,
  ]


class ExportedFile(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedFile
  rdf_deps = [
      ExportedMetadata,
      rdfvalue.RDFDatetimeSeconds,
      rdfvalue.RDFURN,
      rdf_client_fs.StatMode,
  ]


class ExportedRegistryKey(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedRegistryKey
  rdf_deps = [
      ExportedMetadata,
      rdfvalue.RDFDatetimeSeconds,
      rdfvalue.RDFURN,
  ]


class ExportedProcess(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedProcess
  rdf_deps = [
      ExportedMetadata,
  ]


class ExportedNetworkConnection(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedNetworkConnection
  rdf_deps = [
      ExportedMetadata,
      rdf_client_network.NetworkEndpoint,
  ]


class ExportedDNSClientConfiguration(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedDNSClientConfiguration
  rdf_deps = [
      ExportedMetadata,
  ]


class ExportedOpenFile(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedOpenFile
  rdf_deps = [
      ExportedMetadata,
  ]


class ExportedNetworkInterface(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedNetworkInterface
  rdf_deps = [
      ExportedMetadata,
  ]


class ExportedFileStoreHash(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedFileStoreHash
  rdf_deps = [
      ExportedMetadata,
      rdfvalue.RDFURN,
  ]


class ExportedAnomaly(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedAnomaly


class ExportedCheckResult(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedCheckResult
  rdf_deps = [
      ExportedAnomaly,
      ExportedMetadata,
  ]


class ExportedMatch(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedMatch
  rdf_deps = [
      ExportedMetadata,
      rdfvalue.RDFURN,
  ]


class ExportedBytes(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedBytes
  rdf_deps = [
      ExportedMetadata,
  ]


class ExportedString(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedString
  rdf_deps = [
      ExportedMetadata,
  ]


class ExportedDictItem(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedDictItem
  rdf_deps = [ExportedMetadata]


class ExportedArtifactFilesDownloaderResult(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedArtifactFilesDownloaderResult
  rdf_deps = [
      ExportedFile,
      ExportedMetadata,
      ExportedRegistryKey,
  ]


class ExportedSoftwarePackage(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedSoftwarePackage
  rdf_deps = [
      ExportedMetadata,
  ]


class ExportedYaraProcessScanMatch(rdf_structs.RDFProtoStruct):
  protobuf = export_pb2.ExportedYaraProcessScanMatch
  rdf_deps = [ExportedProcess, ExportedMetadata]


class ExportConverter(with_metaclass(registry.MetaclassRegistry, object)):
  """Base ExportConverter class.

  ExportConverters are used to convert RDFValues to export-friendly RDFValues.
  "Export-friendly" means 2 things:
    * Flat structure
    * No repeated fields (i.e. lists)

  In order to use ExportConverters, users have to use ConvertValues.
  These methods will look up all the available ExportConverters descendants
  and will choose the ones that have input_rdf_type attribute equal to the
  type of the values being converted. It's ok to have multiple converters with
  the same input_rdf_type value. They will be applied sequentially and their
  cumulative results will be returned.
  """

  # Type of values that this converter accepts.
  input_rdf_type = None

  # Cache used for GetConvertersByValue() lookups.
  converters_cache = {}

  def __init__(self, options=None):
    """Constructor.

    Args:
      options: ExportOptions value, which contains settings that may or or may
        not affect this converter's behavior.
    """
    super(ExportConverter, self).__init__()
    self.options = options or ExportOptions()

  def Convert(self, metadata, value, token=None):
    """Converts given RDFValue to other RDFValues.

    Metadata object is provided by the caller. It contains basic information
    about where the value is coming from (i.e. client_urn, session_id, etc)
    as well as timestamps corresponding to when data was generated and
    exported.

    ExportConverter should use the metadata when constructing export-friendly
    RDFValues.

    Args:
      metadata: ExportedMetadata to be used for conversion.
      value: RDFValue to be converted.
      token: Security token.

    Yields:
      Resulting RDFValues. Empty list is a valid result and means that
      conversion wasn't possible. Resulting RDFValues may be of different
      types.
    """
    raise NotImplementedError()

  def BatchConvert(self, metadata_value_pairs, token=None):
    """Converts a batch of RDFValues at once.

    This is a default non-optimized dumb implementation. Subclasses are
    supposed to have their own optimized implementations.

    Metadata object is provided by the caller. It contains basic information
    about where the value is coming from (i.e. client_urn, session_id, etc)
    as well as timestamps corresponding to when data was generated and
    exported.

    ExportConverter should use the metadata when constructing export-friendly
    RDFValues.

    Args:
      metadata_value_pairs: a list or a generator of tuples (metadata, value),
        where metadata is ExportedMetadata to be used for conversion and value
        is an RDFValue to be converted.
      token: Security token.

    Yields:
      Resulting RDFValues. Empty list is a valid result and means that
      conversion wasn't possible. Resulting RDFValues may be of different
      types.
    """
    for metadata, value in metadata_value_pairs:
      for result in self.Convert(metadata, value, token):
        yield result

  @staticmethod
  def GetConvertersByClass(value_cls):
    """Returns all converters that take given value as an input value."""
    try:
      return ExportConverter.converters_cache[value_cls]
    except KeyError:
      results = [
          cls for cls in itervalues(ExportConverter.classes)
          if cls.input_rdf_type == value_cls
      ]
      if not results:
        results = [DataAgnosticExportConverter]

      ExportConverter.converters_cache[value_cls] = results
      return results

  @staticmethod
  def GetConvertersByValue(value):
    """Returns all converters that take given value as an input value."""
    return ExportConverter.GetConvertersByClass(value.__class__)


class AutoExportedProtoStruct(rdf_structs.RDFProtoStruct):
  """Special base class for auto-exported values."""


class DataAgnosticExportConverter(ExportConverter):
  """Export converter that yields flattened versions of passed values.

  NOTE: DataAgnosticExportConverter discards complex types: repeated
  fields and nested messages. Only the primitive types (including enums)
  are preserved.
  """

  # Cache used for generated classes.
  classes_cache = {}

  def ExportedClassNameForValue(self, value):
    return utils.SmartStr("AutoExported" + value.__class__.__name__)

  def MakeFlatRDFClass(self, value):
    """Generates flattened RDFValue class definition for the given value."""

    def Flatten(self, metadata, value_to_flatten):
      if metadata:
        self.metadata = metadata

      for desc in value_to_flatten.type_infos:
        if desc.name == "metadata":
          continue
        if hasattr(self, desc.name) and value_to_flatten.HasField(desc.name):
          setattr(self, desc.name, getattr(value_to_flatten, desc.name))

    descriptors = []
    enums = {}

    # Metadata is always the first field of exported data.
    descriptors.append(
        rdf_structs.ProtoEmbedded(
            name="metadata", field_number=1, nested=ExportedMetadata))

    for number, desc in sorted(iteritems(value.type_infos_by_field_number)):
      # Name 'metadata' is reserved to store ExportedMetadata value.
      if desc.name == "metadata":
        logging.debug("Ignoring 'metadata' field in %s.",
                      value.__class__.__name__)
        continue

      # Copy descriptors for primivie values as-is, just make sure their
      # field number is correct.
      if isinstance(desc, (rdf_structs.ProtoBinary, rdf_structs.ProtoString,
                           rdf_structs.ProtoUnsignedInteger,
                           rdf_structs.ProtoRDFValue, rdf_structs.ProtoEnum)):
        # Incrementing field number by 1, as 1 is always occuppied by metadata.
        descriptors.append(desc.Copy(field_number=number + 1))

      if (isinstance(desc, rdf_structs.ProtoEnum) and
          not isinstance(desc, rdf_structs.ProtoBoolean)):
        # Attach the enum container to the class for easy reference:
        enums[desc.enum_name] = desc.enum_container

    # Create the class as late as possible. This will modify a
    # metaclass registry, we need to make sure there are no problems.
    output_class = type(
        self.ExportedClassNameForValue(value), (AutoExportedProtoStruct,),
        dict(Flatten=Flatten))

    for descriptor in descriptors:
      output_class.AddDescriptor(descriptor)

    for name, container in iteritems(enums):
      setattr(output_class, name, container)

    return output_class

  def Convert(self, metadata, value, token=None):
    class_name = self.ExportedClassNameForValue(value)
    try:
      cls = DataAgnosticExportConverter.classes_cache[class_name]
    except KeyError:
      cls = self.MakeFlatRDFClass(value)
      DataAgnosticExportConverter.classes_cache[class_name] = cls

    result_obj = cls()
    result_obj.Flatten(metadata, value)
    yield result_obj

  def BatchConvert(self, metadata_value_pairs, token=None):
    for metadata, value in metadata_value_pairs:
      for result in self.Convert(metadata, value, token=token):
        yield result


class StatEntryToExportedFileConverter(ExportConverter):
  """Converts StatEntry to ExportedFile."""

  input_rdf_type = rdf_client_fs.StatEntry

  MAX_CONTENT_SIZE = 1024 * 64

  @staticmethod
  def ParseSignedData(signed_data, result):
    """Parses signed certificate data and updates result rdfvalue."""
    try:
      auth_data
    except NameError:
      # Verify_sigs is not available so we can't parse signatures. If you want
      # this functionality, please install the verify-sigs package:
      # https://github.com/anthrotype/verify-sigs
      # TODO(amoser): Make verify-sigs a pip package and add a dependency.
      return

    try:
      try:
        auth = auth_data.AuthData(signed_data.certificate)
      except Exception as e:  # pylint: disable=broad-except
        # If we failed to parse the certificate, we want the user to know it.
        result.cert_hasher_name = "Error parsing certificate: %s" % str(e)
        raise

      result.cert_hasher_name = auth.digest_algorithm().name
      result.cert_program_name = str(auth.program_name)
      result.cert_program_url = str(auth.program_url)
      result.cert_signing_id = str(auth.signing_cert_id)

      try:
        # This fills in auth.cert_chain_head. We ignore Asn1Error because
        # we want to extract as much data as possible, no matter if the
        # certificate has expired or not.
        auth.ValidateCertChains(time.gmtime())
      except auth_data.Asn1Error:
        pass
      result.cert_chain_head_issuer = str(auth.cert_chain_head[2])

      if auth.has_countersignature:
        result.cert_countersignature_chain_head_issuer = str(
            auth.counter_chain_head[2])

      certs = []
      for (issuer, serial), cert in iteritems(auth.certificates):
        subject = cert[0][0]["subject"]
        subject_dn = str(dn.DistinguishedName.TraverseRdn(subject[0]))
        not_before = cert[0][0]["validity"]["notBefore"]
        not_after = cert[0][0]["validity"]["notAfter"]
        not_before_time = not_before.ToPythonEpochTime()
        not_after_time = not_after.ToPythonEpochTime()
        not_before_time_str = time.asctime(time.gmtime(not_before_time))
        not_after_time_str = time.asctime(time.gmtime(not_after_time))

        certs.append(
            dict(
                issuer=issuer,
                serial=serial,
                subject=subject_dn,
                not_before_time=not_before_time_str,
                not_after_time=not_after_time_str))
      result.cert_certificates = str(certs)

    # Verify_sigs library can basically throw all kinds of exceptions so
    # we have to use broad except here.
    except Exception as e:  # pylint: disable=broad-except
      logging.error(e)

  @staticmethod
  def ParseFileHash(hash_obj, result):
    """Parses Hash rdfvalue into ExportedFile's fields."""
    if hash_obj.HasField("md5"):
      result.hash_md5 = str(hash_obj.md5)

    if hash_obj.HasField("sha1"):
      result.hash_sha1 = str(hash_obj.sha1)

    if hash_obj.HasField("sha256"):
      result.hash_sha256 = str(hash_obj.sha256)

    if hash_obj.HasField("pecoff_md5"):
      result.pecoff_hash_md5 = str(hash_obj.pecoff_md5)

    if hash_obj.HasField("pecoff_sha1"):
      result.pecoff_hash_sha1 = str(hash_obj.pecoff_sha1)

    if hash_obj.HasField("signed_data"):
      StatEntryToExportedFileConverter.ParseSignedData(hash_obj.signed_data[0],
                                                       result)

  def Convert(self, metadata, stat_entry, token=None):
    """Converts StatEntry to ExportedFile.

    Does nothing if StatEntry corresponds to a registry entry and not to a file.

    Args:
      metadata: ExportedMetadata to be used for conversion.
      stat_entry: StatEntry to be converted.
      token: Security token.

    Returns:
      List or generator with resulting RDFValues. Empty list if StatEntry
      corresponds to a registry entry and not to a file.
    """
    return self.BatchConvert([(metadata, stat_entry)], token=token)

  def _RemoveRegistryKeys(self, metadata_value_pairs):
    """Filter out registry keys to operate on files."""
    filtered_pairs = []
    for metadata, stat_entry in metadata_value_pairs:
      # Ignore registry keys.
      if stat_entry.pathspec.pathtype != rdf_paths.PathSpec.PathType.REGISTRY:
        filtered_pairs.append((metadata, stat_entry))

    return filtered_pairs

  def _OpenFilesForRead(self, metadata_value_pairs, token):
    """Open files all at once if necessary."""
    aff4_paths = [
        result.AFF4Path(metadata.client_urn)
        for metadata, result in metadata_value_pairs
    ]
    fds = aff4.FACTORY.MultiOpen(aff4_paths, mode="r", token=token)
    fds_dict = dict([(fd.urn, fd) for fd in fds])
    return fds_dict

  def _ExportFileContent(self, aff4_object, result):
    """Add file content from aff4_object to result."""
    if self.options.export_files_contents:
      try:
        result.content = aff4_object.Read(self.MAX_CONTENT_SIZE)
        result.content_sha256 = hashlib.sha256(result.content).hexdigest()
      except (IOError, AttributeError) as e:
        logging.warning("Can't read content of %s: %s", aff4_object.urn, e)

  def _CreateExportedFile(self, metadata, stat_entry):
    return ExportedFile(
        metadata=metadata,
        urn=stat_entry.AFF4Path(metadata.client_urn),
        basename=stat_entry.pathspec.Basename(),
        st_mode=stat_entry.st_mode,
        st_ino=stat_entry.st_ino,
        st_dev=stat_entry.st_dev,
        st_nlink=stat_entry.st_nlink,
        st_uid=stat_entry.st_uid,
        st_gid=stat_entry.st_gid,
        st_size=stat_entry.st_size,
        st_atime=stat_entry.st_atime,
        st_mtime=stat_entry.st_mtime,
        st_ctime=stat_entry.st_ctime,
        st_blocks=stat_entry.st_blocks,
        st_blksize=stat_entry.st_blksize,
        st_rdev=stat_entry.st_rdev,
        symlink=stat_entry.symlink)

  def _BatchConvertLegacy(self, metadata_value_pairs, token=None):
    filtered_pairs = self._RemoveRegistryKeys(metadata_value_pairs)

    fds_dict = None
    if self.options.export_files_contents:
      fds_dict = self._OpenFilesForRead(filtered_pairs, token=token)

    for metadata, stat_entry in filtered_pairs:
      result = self._CreateExportedFile(metadata, stat_entry)

      if self.options.export_files_contents:
        try:
          aff4_object = fds_dict[stat_entry.AFF4Path(metadata.client_urn)]
          self._ExportFileContent(aff4_object, result)
        except KeyError:
          pass
      yield result

  _BATCH_SIZE = 5000

  def _BatchConvertRelational(self, metadata_value_pairs):
    filtered_pairs = self._RemoveRegistryKeys(metadata_value_pairs)
    for fp_batch in collection.Batch(filtered_pairs, self._BATCH_SIZE):

      if self.options.export_files_contents:
        pathspec_by_client_path = {}
        for metadata, stat_entry in fp_batch:
          # TODO(user): Deprecate client_urn in ExportedMetadata in favor of
          # client_id (to be added).
          client_path = db.ClientPath.FromPathSpec(
              metadata.client_urn.Basename(), stat_entry.pathspec)
          pathspec_by_client_path[client_path] = stat_entry.pathspec

        data_by_pathspec = {}
        for chunk in file_store.StreamFilesChunks(
            pathspec_by_client_path, max_size=self.MAX_CONTENT_SIZE):
          pathspec = pathspec_by_client_path[chunk.client_path]
          data_by_pathspec.setdefault(pathspec, []).append(chunk.data)

      for metadata, stat_entry in fp_batch:
        result = self._CreateExportedFile(metadata, stat_entry)

        if self.options.export_files_contents:
          try:
            data = data_by_pathspec[stat_entry.pathspec]
            result.content = b"".join(data)[:self.MAX_CONTENT_SIZE]
            result.content_sha256 = hashlib.sha256(result.content).hexdigest()
          except KeyError:
            pass

        yield result

  def BatchConvert(self, metadata_value_pairs, token=None):
    """Converts a batch of StatEntry value to ExportedFile values at once.

    Args:
      metadata_value_pairs: a list or a generator of tuples (metadata, value),
        where metadata is ExportedMetadata to be used for conversion and value
        is a StatEntry to be converted.
      token: Security token:

    Yields:
      Resulting ExportedFile values. Empty list is a valid result and means that
      conversion wasn't possible.
    """
    if data_store.RelationalDBEnabled():
      result_generator = self._BatchConvertRelational(metadata_value_pairs)
    else:
      result_generator = self._BatchConvertLegacy(
          metadata_value_pairs, token=token)

    for r in result_generator:
      yield r


class StatEntryToExportedRegistryKeyConverter(ExportConverter):
  """Converts StatEntry to ExportedRegistryKey."""

  input_rdf_type = rdf_client_fs.StatEntry

  def Convert(self, metadata, stat_entry, token=None):
    """Converts StatEntry to ExportedRegistryKey.

    Does nothing if StatEntry corresponds to a file and not a registry entry.

    Args:
      metadata: ExportedMetadata to be used for conversion.
      stat_entry: StatEntry to be converted.
      token: Security token.

    Returns:
      List or generator with resulting RDFValues. Empty list if StatEntry
      corresponds to a file and not to a registry entry.
    """
    if stat_entry.pathspec.pathtype != rdf_paths.PathSpec.PathType.REGISTRY:
      return []

    result = ExportedRegistryKey(
        metadata=metadata,
        urn=stat_entry.AFF4Path(metadata.client_urn),
        last_modified=stat_entry.st_mtime)

    if (stat_entry.HasField("registry_type") and
        stat_entry.HasField("registry_data")):

      result.type = stat_entry.registry_type

      # `data` can be value of arbitrary type and we need to return `bytes`. So,
      # if it is `bytes` we just pass it through. If it is not, we stringify it
      # to some human-readable form and turn it to `bytes` by UTF-8 encoding.
      data = stat_entry.registry_data.GetValue()
      if isinstance(data, bytes):
        result.data = data
      else:
        result.data = str(data).encode("utf-8")

    return [result]


class NetworkConnectionToExportedNetworkConnectionConverter(ExportConverter):
  """Converts NetworkConnection to ExportedNetworkConnection."""

  input_rdf_type = rdf_client_network.NetworkConnection

  def Convert(self, metadata, conn, token=None):
    """Converts NetworkConnection to ExportedNetworkConnection."""

    result = ExportedNetworkConnection(
        metadata=metadata,
        family=conn.family,
        type=conn.type,
        local_address=conn.local_address,
        remote_address=conn.remote_address,
        state=conn.state,
        pid=conn.pid,
        ctime=conn.ctime)
    return [result]


class ProcessToExportedProcessConverter(ExportConverter):
  """Converts Process to ExportedProcess."""

  input_rdf_type = rdf_client.Process

  def Convert(self, metadata, process, token=None):
    """Converts Process to ExportedProcess."""

    result = ExportedProcess(
        metadata=metadata,
        pid=process.pid,
        ppid=process.ppid,
        name=process.name,
        exe=process.exe,
        cmdline=" ".join(process.cmdline),
        ctime=process.ctime,
        real_uid=process.real_uid,
        effective_uid=process.effective_uid,
        saved_uid=process.saved_uid,
        real_gid=process.real_gid,
        effective_gid=process.effective_gid,
        saved_gid=process.saved_gid,
        username=process.username,
        terminal=process.terminal,
        status=process.status,
        nice=process.nice,
        cwd=process.cwd,
        num_threads=process.num_threads,
        user_cpu_time=process.user_cpu_time,
        system_cpu_time=process.system_cpu_time,
        cpu_percent=process.cpu_percent,
        rss_size=process.RSS_size,
        vms_size=process.VMS_size,
        memory_percent=process.memory_percent)
    return [result]


class ProcessToExportedNetworkConnectionConverter(ExportConverter):
  """Converts Process to ExportedNetworkConnection."""

  input_rdf_type = rdf_client.Process

  def Convert(self, metadata, process, token=None):
    """Converts Process to ExportedNetworkConnection."""

    conn_converter = NetworkConnectionToExportedNetworkConnectionConverter(
        options=self.options)
    return conn_converter.BatchConvert(
        [(metadata, conn) for conn in process.connections], token=token)


class ProcessToExportedOpenFileConverter(ExportConverter):
  """Converts Process to ExportedOpenFile."""

  input_rdf_type = rdf_client.Process

  def Convert(self, metadata, process, token=None):
    """Converts Process to ExportedOpenFile."""

    for f in process.open_files:
      yield ExportedOpenFile(metadata=metadata, pid=process.pid, path=f)


class InterfaceToExportedNetworkInterfaceConverter(ExportConverter):
  input_rdf_type = rdf_client_network.Interface

  def Convert(self, metadata, interface, token=None):
    """Converts Interface to ExportedNetworkInterfaces."""
    ip4_addresses = []
    ip6_addresses = []
    for addr in interface.addresses:
      if addr.address_type == addr.Family.INET:
        ip4_addresses.append(addr.human_readable_address)
      elif addr.address_type == addr.Family.INET6:
        ip6_addresses.append(addr.human_readable_address)
      else:
        raise ValueError("Invalid address type: %s" % addr.address_type)

    result = ExportedNetworkInterface(
        metadata=metadata,
        ifname=interface.ifname,
        ip4_addresses=" ".join(ip4_addresses),
        ip6_addresses=" ".join(ip6_addresses))

    if interface.mac_address:
      result.mac_address = interface.mac_address.human_readable_address

    yield result


class DNSClientConfigurationToExportedDNSClientConfiguration(ExportConverter):
  input_rdf_type = rdf_client_network.DNSClientConfiguration

  def Convert(self, metadata, config, token=None):
    """Converts DNSClientConfiguration to ExportedDNSClientConfiguration."""
    result = ExportedDNSClientConfiguration(
        metadata=metadata,
        dns_servers=" ".join(config.dns_server),
        dns_suffixes=" ".join(config.dns_suffix))
    yield result


class ClientSummaryToExportedNetworkInterfaceConverter(
    InterfaceToExportedNetworkInterfaceConverter):
  input_rdf_type = rdf_client.ClientSummary

  def Convert(self, metadata, client_summary, token=None):
    """Converts ClientSummary to ExportedNetworkInterfaces."""
    for interface in client_summary.interfaces:
      yield super(ClientSummaryToExportedNetworkInterfaceConverter,
                  self).Convert(
                      metadata, interface, token=token).next()


class ClientSummaryToExportedClientConverter(ExportConverter):
  input_rdf_type = rdf_client.ClientSummary

  def Convert(self, metadata, unused_client_summary, token=None):
    return [ExportedClient(metadata=metadata)]


class BufferReferenceToExportedMatchConverter(ExportConverter):
  """Export converter for BufferReference instances."""

  input_rdf_type = rdf_client.BufferReference

  def Convert(self, metadata, buffer_reference, token=None):
    yield ExportedMatch(
        metadata=metadata,
        offset=buffer_reference.offset,
        length=buffer_reference.length,
        data=buffer_reference.data,
        urn=buffer_reference.pathspec.AFF4Path(metadata.client_urn))


class FileFinderResultConverter(StatEntryToExportedFileConverter):
  """Export converter for FileFinderResult instances."""

  input_rdf_type = rdf_file_finder.FileFinderResult

  def __init__(self, *args, **kwargs):
    super(FileFinderResultConverter, self).__init__(*args, **kwargs)
    # We only need to open the file if we're going to export the contents, we
    # already have the hash in the FileFinderResult
    self.open_file_for_read = self.options.export_files_contents

  def _SeparateTypes(self, metadata_value_pairs):
    """Separate files, registry keys, grep matches."""
    registry_pairs = []
    file_pairs = []
    match_pairs = []
    for metadata, result in metadata_value_pairs:
      if (result.stat_entry.pathspec.pathtype ==
          rdf_paths.PathSpec.PathType.REGISTRY):
        registry_pairs.append((metadata, result.stat_entry))
      else:
        file_pairs.append((metadata, result))

      match_pairs.extend([(metadata, match) for match in result.matches])

    return registry_pairs, file_pairs, match_pairs

  def BatchConvert(self, metadata_value_pairs, token=None):
    """Convert FileFinder results.

    Args:
      metadata_value_pairs: array of ExportedMetadata and rdfvalue tuples.
      token: ACLToken

    Yields:
      ExportedFile, ExportedRegistryKey, or ExportedMatch

    FileFinderResult objects have 3 types of results that need to be handled
    separately. Files, registry keys, and grep matches. The file results are
    similar to statentry exports, and share some code, but different because we
    already have the hash available without having to go back to the database to
    retrieve it from the aff4 object.

    """
    if data_store.RelationalDBEnabled():
      result_generator = self._BatchConvertRelational(metadata_value_pairs)
    else:
      result_generator = self._BatchConvertLegacy(
          metadata_value_pairs, token=token)

    for r in result_generator:
      yield r

  def _BatchConvertLegacy(self, metadata_value_pairs, token=None):
    registry_pairs, file_pairs, match_pairs = self._SeparateTypes(
        metadata_value_pairs)

    # Export files first
    fds_dict = None
    if self.options.export_files_contents:
      fds_dict = self._OpenFilesForRead(
          [(metadata, val.stat_entry) for metadata, val in file_pairs],
          token=token)

    for metadata, ff_result in file_pairs:
      result = self._CreateExportedFile(metadata, ff_result.stat_entry)

      # FileFinderResult has hashes in "hash_entry" attribute which is not
      # passed to ConvertValuesWithMetadata call. We have to process these
      # explicitly here.
      self.ParseFileHash(ff_result.hash_entry, result)

      if self.options.export_files_contents:
        urn = ff_result.stat_entry.AFF4Path(metadata.client_urn)
        try:
          aff4_object = fds_dict[urn]
          self._ExportFileContent(aff4_object, result)
        except KeyError:
          logging.warning("Couldn't open %s for export", urn)
      yield result

    # Now export the registry keys
    for result in ConvertValuesWithMetadata(
        registry_pairs, token=token, options=self.options):
      yield result

    # Now export the grep matches.
    for result in ConvertValuesWithMetadata(
        match_pairs, token=token, options=self.options):
      yield result

  _BATCH_SIZE = 5000

  def _BatchConvertRelational(self, metadata_value_pairs, token=None):
    registry_pairs, file_pairs, match_pairs = self._SeparateTypes(
        metadata_value_pairs)
    for fp_batch in collection.Batch(file_pairs, self._BATCH_SIZE):

      if self.options.export_files_contents:
        pathspec_by_client_path = {}
        for metadata, ff_result in fp_batch:
          # TODO(user): Deprecate client_urn in ExportedMetadata in favor of
          # client_id (to be added).
          client_path = db.ClientPath.FromPathSpec(
              metadata.client_urn.Basename(), ff_result.stat_entry.pathspec)
          pathspec_by_client_path[client_path] = ff_result.stat_entry.pathspec

        data_by_pathspec = {}
        for chunk in file_store.StreamFilesChunks(
            pathspec_by_client_path, max_size=self.MAX_CONTENT_SIZE):
          pathspec = pathspec_by_client_path[chunk.client_path]
          data_by_pathspec.setdefault(pathspec, []).append(chunk.data)

      for metadata, ff_result in fp_batch:
        result = self._CreateExportedFile(metadata, ff_result.stat_entry)

        # FileFinderResult has hashes in "hash_entry" attribute which is not
        # passed to ConvertValuesWithMetadata call. We have to process these
        # explicitly here.
        self.ParseFileHash(ff_result.hash_entry, result)

        if self.options.export_files_contents:
          try:
            data = data_by_pathspec[ff_result.stat_entry.pathspec]
            result.content = b"".join(data)[:self.MAX_CONTENT_SIZE]
            result.content_sha256 = hashlib.sha256(result.content).hexdigest()
          except KeyError:
            pass

        yield result

    # Now export the registry keys
    for result in ConvertValuesWithMetadata(
        registry_pairs, token=token, options=self.options):
      yield result

    # Now export the grep matches.
    for result in ConvertValuesWithMetadata(
        match_pairs, token=token, options=self.options):
      yield result

  def Convert(self, metadata, result, token=None):
    return self.BatchConvert([(metadata, result)], token=token)


class RDFURNConverter(ExportConverter):
  """Follows RDFURN and converts its target object into a set of RDFValues.

  Note: This is DEPRECATED due to REL_DB and URN-less world migration.

  TODO(user): remove this as soon as REL_DB becomes the main implementation
  and URNs are gone.
  """

  input_rdf_type = rdfvalue.RDFURN

  def Convert(self, metadata, stat_entry, token=None):
    return self.BatchConvert([(metadata, stat_entry)], token=token)

  def BatchConvert(self, metadata_value_pairs, token=None):
    urn_metadata_pairs = []
    for metadata, value in metadata_value_pairs:
      if isinstance(value, rdfvalue.RDFURN):
        urn_metadata_pairs.append((value, metadata))

    urns_dict = dict(urn_metadata_pairs)
    fds = aff4.FACTORY.MultiOpen(iterkeys(urns_dict), mode="r", token=token)

    batch = []
    for fd in fds:
      batch.append((urns_dict[fd.urn], fd))

    try:
      return ConvertValuesWithMetadata(batch, token=token)
    except NoConverterFound as e:
      logging.debug(e)

    return []


class CollectionConverterBase(ExportConverter):

  input_rdf_type = None

  BATCH_SIZE = 1000

  def Convert(self, metadata, aff4_collection, token=None):
    if not collection:
      return

    for batch in collection.Batch(aff4_collection, self.BATCH_SIZE):
      converted_batch = ConvertValues(
          metadata, batch, token=token, options=self.options)
      for v in converted_batch:
        yield v


class GrrMessageCollectionConverter(CollectionConverterBase):
  input_rdf_type = sequential_collection.GrrMessageCollection


class HuntResultCollectionConverter(CollectionConverterBase):
  input_rdf_type = hunt_results.HuntResultCollection


class FlowResultCollectionConverter(CollectionConverterBase):
  input_rdf_type = flow.FlowResultCollection


class VFSFileToExportedFileConverter(ExportConverter):

  input_rdf_type = aff4_grr.VFSFile

  def Convert(self, metadata, vfs_file, token=None):
    stat_entry = vfs_file.Get(vfs_file.Schema.STAT)
    if not stat_entry:
      return []

    result = ExportedFile(
        metadata=metadata,
        urn=stat_entry.AFF4Path(metadata.client_urn),
        basename=stat_entry.pathspec.Basename(),
        st_mode=stat_entry.st_mode,
        st_ino=stat_entry.st_ino,
        st_dev=stat_entry.st_dev,
        st_nlink=stat_entry.st_nlink,
        st_uid=stat_entry.st_uid,
        st_gid=stat_entry.st_gid,
        st_size=stat_entry.st_size,
        st_atime=stat_entry.st_atime,
        st_mtime=stat_entry.st_mtime,
        st_ctime=stat_entry.st_ctime,
        st_blocks=stat_entry.st_blocks,
        st_blksize=stat_entry.st_blksize,
        st_rdev=stat_entry.st_rdev,
        symlink=stat_entry.symlink)

    hash_obj = data_store_utils.GetFileHashEntry(vfs_file)
    if hash_obj:
      StatEntryToExportedFileConverter.ParseFileHash(hash_obj, result)

    return [result]


class RDFBytesToExportedBytesConverter(ExportConverter):

  input_rdf_type = rdfvalue.RDFBytes

  def Convert(self, metadata, data, token=None):
    result = ExportedBytes(
        metadata=metadata, data=data.SerializeToString(), length=len(data))
    return [result]


class RDFStringToExportedStringConverter(ExportConverter):

  input_rdf_type = rdfvalue.RDFString

  def Convert(self, metadata, data, token=None):
    return [ExportedString(metadata=metadata, data=data.SerializeToString())]


class DictToExportedDictItemsConverter(ExportConverter):
  """Export converter that converts Dict to ExportedDictItems."""

  input_rdf_type = rdf_protodict.Dict

  def _IterateDict(self, d, key=""):
    if isinstance(d, (list, tuple)):
      for i, v in enumerate(d):
        next_key = "%s[%d]" % (key, i)
        for v in self._IterateDict(v, key=next_key):
          yield v
    elif isinstance(d, set):
      for i, v in enumerate(sorted(d)):
        next_key = "%s[%d]" % (key, i)
        for v in self._IterateDict(v, key=next_key):
          yield v
    elif isinstance(d, (dict, rdf_protodict.Dict)):
      for k in sorted(d):
        k = utils.SmartStr(k)

        v = d[k]
        if not key:
          next_key = k
        else:
          next_key = key + "." + k

        for v in self._IterateDict(v, key=next_key):
          yield v
    else:
      yield key, d

  def Convert(self, metadata, data, token=None):
    result = []
    d = data.ToDict()
    for k, v in self._IterateDict(d):
      result.append(
          ExportedDictItem(metadata=metadata, key=k, value=utils.SmartStr(v)))

    return result


class GrrMessageConverter(ExportConverter):
  """Converts GrrMessage's payload into a set of RDFValues.

  GrrMessageConverter converts given GrrMessages to a set of exportable
  RDFValues. It looks at the payload of every message and applies necessary
  converters to produce the resulting RDFValues.

  Usually, when a value is converted via one of the ExportConverter classes,
  metadata (ExportedMetadata object describing the client, session id, etc)
  are provided by the caller. But when converting GrrMessages, the caller can't
  provide any reasonable metadata. In order to understand where the messages
  are coming from, one actually has to inspect the messages source and this
  is done by GrrMessageConverter and not by the caller.

  Although ExportedMetadata should still be provided for the conversion to
  happen, only "source_urn" and value will be used. All other metadata will be
  fetched from the client object pointed to by GrrMessage.source.
  """

  input_rdf_type = rdf_flows.GrrMessage

  def __init__(self, *args, **kw):
    super(GrrMessageConverter, self).__init__(*args, **kw)
    self.cached_metadata = {}

  def Convert(self, metadata, grr_message, token=None):
    """Converts GrrMessage into a set of RDFValues.

    Args:
      metadata: ExportedMetadata to be used for conversion.
      grr_message: GrrMessage to be converted.
      token: Security token.

    Returns:
      List or generator with resulting RDFValues.
    """
    return self.BatchConvert([(metadata, grr_message)], token=token)

  def BatchConvert(self, metadata_value_pairs, token=None):
    """Converts a batch of GrrMessages into a set of RDFValues at once.

    Args:
      metadata_value_pairs: a list or a generator of tuples (metadata, value),
        where metadata is ExportedMetadata to be used for conversion and value
        is a GrrMessage to be converted.
      token: Security token.

    Returns:
      Resulting RDFValues. Empty list is a valid result and means that
      conversion wasn't possible.
    """

    # Group messages by source (i.e. by client urn).
    msg_dict = {}
    for metadata, msg in metadata_value_pairs:
      msg_dict.setdefault(msg.source, []).append((metadata, msg))

    metadata_objects = []
    metadata_to_fetch = []

    # Open the clients we don't have metadata for and fetch metadata.
    for client_urn in msg_dict:
      try:
        metadata_objects.append(self.cached_metadata[client_urn])
      except KeyError:
        metadata_to_fetch.append(client_urn)

    if metadata_to_fetch:
      if data_store.RelationalDBEnabled():
        client_ids = set(urn.Basename() for urn in metadata_to_fetch)
        infos = data_store.REL_DB.MultiReadClientFullInfo(client_ids)

        fetched_metadata = [
            GetMetadata(client_id, info) for client_id, info in infos.items()
        ]
      else:
        client_fds = aff4.FACTORY.MultiOpen(
            metadata_to_fetch, mode="r", token=token)

        fetched_metadata = [
            GetMetadataLegacy(client_fd, token=token)
            for client_fd in client_fds
        ]

      for metadata in fetched_metadata:
        self.cached_metadata[metadata.client_urn] = metadata
      metadata_objects.extend(fetched_metadata)

    data_by_type = {}
    for metadata in metadata_objects:
      try:
        for original_metadata, message in msg_dict[metadata.client_urn]:
          # Get source_urn and annotations from the original metadata
          # provided and original_timestamp from the payload age.
          new_metadata = ExportedMetadata(metadata)
          new_metadata.source_urn = original_metadata.source_urn
          new_metadata.annotations = original_metadata.annotations
          new_metadata.original_timestamp = message.payload.age
          cls_name = message.payload.__class__.__name__

          # Create a dict of values for conversion keyed by type, so we can
          # apply the right converters to the right object types
          if cls_name not in data_by_type:
            converters_classes = ExportConverter.GetConvertersByValue(
                message.payload)
            data_by_type[cls_name] = {
                "converters": [cls(self.options) for cls in converters_classes],
                "batch_data": [(new_metadata, message.payload)]
            }
          else:
            data_by_type[cls_name]["batch_data"].append(
                (new_metadata, message.payload))

      except KeyError:
        pass

    # Run all converters against all objects of the relevant type
    converted_batch = []
    for dataset in itervalues(data_by_type):
      for converter in dataset["converters"]:
        converted_batch.extend(
            converter.BatchConvert(dataset["batch_data"], token=token))

    return converted_batch


class FileStoreHashConverter(ExportConverter):
  input_rdf_type = filestore.FileStoreHash

  def Convert(self, metadata, stat_entry, token=None):
    """Convert a single FileStoreHash."""

    return self.BatchConvert([(metadata, stat_entry)], token=token)

  def BatchConvert(self, metadata_value_pairs, token=None):
    """Convert batch of FileStoreHashs."""

    urns = [urn for metadata, urn in metadata_value_pairs]
    urns_dict = dict((urn, metadata) for metadata, urn in metadata_value_pairs)

    results = []
    for hash_urn, client_files in filestore.HashFileStore.GetClientsForHashes(
        urns, token=token):
      for hit in client_files:
        metadata = ExportedMetadata(urns_dict[hash_urn])
        metadata.client_urn = rdfvalue.RDFURN(hit).Split(2)[0]

        result = ExportedFileStoreHash(
            metadata=metadata,
            hash=hash_urn.hash_value,
            fingerprint_type=hash_urn.fingerprint_type,
            hash_type=hash_urn.hash_type,
            target_urn=hit)
        results.append(result)

    return results


class CheckResultConverter(ExportConverter):
  input_rdf_type = checks.CheckResult

  def Convert(self, metadata, checkresult, token=None):
    """Converts a single CheckResult.

    Args:
      metadata: ExportedMetadata to be used for conversion.
      checkresult: CheckResult to be converted.
      token: Security token.

    Yields:
      Resulting ExportedCheckResult. Empty list is a valid result and means that
      conversion wasn't possible.
    """
    if checkresult.HasField("anomaly"):
      for anomaly in checkresult.anomaly:
        exported_anomaly = ExportedAnomaly(
            type=anomaly.type,
            severity=anomaly.severity,
            confidence=anomaly.confidence)
        if anomaly.symptom:
          exported_anomaly.symptom = anomaly.symptom
        if anomaly.explanation:
          exported_anomaly.explanation = anomaly.explanation
        if anomaly.generated_by:
          exported_anomaly.generated_by = anomaly.generated_by
        if anomaly.anomaly_reference_id:
          exported_anomaly.anomaly_reference_id = "\n".join(
              anomaly.anomaly_reference_id)
        if anomaly.finding:
          exported_anomaly.finding = "\n".join(anomaly.finding)
        yield ExportedCheckResult(
            metadata=metadata,
            check_id=checkresult.check_id,
            anomaly=exported_anomaly)
    else:
      yield ExportedCheckResult(
          metadata=metadata, check_id=checkresult.check_id)


class ArtifactFilesDownloaderResultConverter(ExportConverter):
  """Converts ArtifactFilesDownloaderResult to its exported version."""

  input_rdf_type = flow_collectors.ArtifactFilesDownloaderResult

  def GetExportedResult(self,
                        original_result,
                        converter,
                        metadata=None,
                        token=None):
    """Converts original result via given converter.."""

    exported_results = list(
        converter.Convert(
            metadata or ExportedMetadata(), original_result, token=token))

    if not exported_results:
      raise ExportError("Got 0 exported result when a single one "
                        "was expected.")

    if len(exported_results) > 1:
      raise ExportError("Got > 1 exported results when a single "
                        "one was expected, seems like a logical bug.")

    return exported_results[0]

  def IsRegistryStatEntry(self, original_result):
    """Checks if given RDFValue is a registry StatEntry."""
    return (original_result.pathspec.pathtype ==
            rdf_paths.PathSpec.PathType.REGISTRY)

  def IsFileStatEntry(self, original_result):
    """Checks if given RDFValue is a file StatEntry."""
    return (original_result.pathspec.pathtype in [
        rdf_paths.PathSpec.PathType.OS, rdf_paths.PathSpec.PathType.TSK
    ])

  def BatchConvert(self, metadata_value_pairs, token=None):
    metadata_value_pairs = list(metadata_value_pairs)

    results = []
    for metadata, value in metadata_value_pairs:
      original_result = value.original_result

      if not isinstance(original_result, rdf_client_fs.StatEntry):
        continue

      if self.IsRegistryStatEntry(original_result):
        exported_registry_key = self.GetExportedResult(
            original_result,
            StatEntryToExportedRegistryKeyConverter(),
            metadata=metadata,
            token=token)
        result = ExportedArtifactFilesDownloaderResult(
            metadata=metadata, original_registry_key=exported_registry_key)
      elif self.IsFileStatEntry(original_result):
        exported_file = self.GetExportedResult(
            original_result,
            StatEntryToExportedFileConverter(),
            metadata=metadata,
            token=token)
        result = ExportedArtifactFilesDownloaderResult(
            metadata=metadata, original_file=exported_file)
      else:
        # TODO(user): if original_result is not a registry key or a file,
        # we should still somehow export the data, otherwise the user will get
        # an impression that there was nothing to export at all.
        continue

      if value.HasField("found_pathspec"):
        result.found_path = value.found_pathspec.CollapsePath()

      downloaded_file = None
      if value.HasField("downloaded_file"):
        downloaded_file = value.downloaded_file

      results.append((result, downloaded_file))

    files_batch = [(r.metadata, f) for r, f in results if f is not None]
    files_converter = StatEntryToExportedFileConverter(options=self.options)
    converted_files = files_converter.BatchConvert(files_batch, token=token)
    converted_files_map = dict((f.urn, f) for f in converted_files)

    for result, downloaded_file in results:
      if downloaded_file:
        aff4path = downloaded_file.AFF4Path(result.metadata.client_urn)
        if aff4path in converted_files_map:
          result.downloaded_file = converted_files_map[aff4path]

      yield result

    # Feed all original results into the export pipeline. There are 2 good
    # reasons to do this:
    # * Export output of ArtifactFilesDownloader flow will be similar to export
    #   output of other file-related flows. I.e. it will produce
    #   ExportedFile entries and ExportedRegistryKey entries and what not, but
    #   in addition it will also generate ExportedArtifactFilesDownloaderResult
    #   entries, that one can use to understand how and where file paths
    #   were detected and how file paths detection algorithm can be possibly
    #   improved.
    # * ExportedArtifactFilesDownloaderResult can only be generated if original
    #   value is a StatEntry. However, original value may be anything, and no
    #   matter what type it has, we want it in the export output.
    original_pairs = [(m, v.original_result) for m, v in metadata_value_pairs]
    for result in ConvertValuesWithMetadata(
        original_pairs, token=token, options=None):
      yield result

  def Convert(self, metadata, value, token=None):
    """Converts a single ArtifactFilesDownloaderResult."""

    for r in self.BatchConvert([(metadata, value)], token=token):
      yield r


class SoftwarePackageConverter(ExportConverter):
  """Converter for rdf_client.SoftwarePackage structs."""

  input_rdf_type = rdf_client.SoftwarePackage

  _INSTALL_STATE_MAP = {
      rdf_client.SoftwarePackage.InstallState.INSTALLED:
          ExportedSoftwarePackage.InstallState.INSTALLED,
      rdf_client.SoftwarePackage.InstallState.PENDING:
          ExportedSoftwarePackage.InstallState.PENDING,
      rdf_client.SoftwarePackage.InstallState.UNINSTALLED:
          ExportedSoftwarePackage.InstallState.UNINSTALLED,
      rdf_client.SoftwarePackage.InstallState.UNKNOWN:
          ExportedSoftwarePackage.InstallState.UNKNOWN
  }

  def Convert(self, metadata, software_package, token=None):
    yield ExportedSoftwarePackage(
        metadata=metadata,
        name=software_package.name,
        version=software_package.version,
        architecture=software_package.architecture,
        publisher=software_package.publisher,
        install_state=self._INSTALL_STATE_MAP[software_package.install_state],
        description=software_package.description,
        installed_on=software_package.installed_on,
        installed_by=software_package.installed_by)


class SoftwarePackagesConverter(ExportConverter):
  """Converter for rdf_client.SoftwarePackages structs."""

  input_rdf_type = rdf_client.SoftwarePackages

  def Convert(self, metadata, software_packages, token=None):
    conv = SoftwarePackageConverter(options=self.options)
    for p in software_packages.packages:
      for r in conv.Convert(metadata, p):
        yield r


class YaraProcessScanResponseConverter(ExportConverter):
  input_rdf_type = rdf_memory.YaraProcessScanMatch

  def Convert(self, metadata, yara_match, token=None):
    """Convert a single YaraProcessScanMatch."""

    conv = ProcessToExportedProcessConverter(options=self.options)
    process = list(
        conv.Convert(ExportedMetadata(), yara_match.process, token=token))[0]

    seen_rules = set()
    for m in yara_match.match:
      if m.rule_name in seen_rules:
        continue

      seen_rules.add(m.rule_name)
      yield ExportedYaraProcessScanMatch(
          metadata=metadata,
          process=process,
          rule_name=m.rule_name,
          scan_time_us=yara_match.scan_time_us)


class OsqueryExportConverter(ExportConverter):
  """An export converted class for transforming osquery table values."""

  input_rdf_type = rdf_osquery.OsqueryTable

  _rdf_cls_cache = {}

  @classmethod
  def _RDFClass(cls, table):
    """Creates a dynamic RDF proto struct class for given osquery table.

    The fields of the proto will correspond to the columns of the table.

    Args:
      table: An osquery table for which the class is about to be generated.

    Returns:
      A class object corresponding to the given table.
    """
    rdf_cls_name = "OsqueryTable{}".format(hash(table.query))
    try:
      return cls._rdf_cls_cache[rdf_cls_name]
    except KeyError:
      pass

    rdf_cls = compatibility.MakeType(rdf_cls_name,
                                     (rdf_structs.RDFProtoStruct,), {})

    rdf_cls.AddDescriptor(
        rdf_structs.ProtoEmbedded(
            name="metadata", field_number=1, nested=ExportedMetadata))

    rdf_cls.AddDescriptor(
        rdf_structs.ProtoString(name="__query__", field_number=2))

    for idx, column in enumerate(table.header.columns):
      # It is possible that RDF column is named "metadata". To avoid name clash
      # we must rename it to `__metadata__`.
      if column.name == "metadata":
        name = "__metadata__"
      else:
        name = column.name

      descriptor = rdf_structs.ProtoString(name=name, field_number=idx + 3)
      rdf_cls.AddDescriptor(descriptor)

    cls._rdf_cls_cache[rdf_cls_name] = rdf_cls
    return rdf_cls

  def Convert(self, metadata, table, token=None):
    del token  # Unused.
    precondition.AssertType(table, rdf_osquery.OsqueryTable)

    rdf_cls = self._RDFClass(table)

    for row in table.rows:
      rdf = rdf_cls()
      rdf.metadata = metadata
      rdf.__query__ = table.query.strip()

      for column, value in zip(table.header.columns, row.values):
        # In order to avoid name clash, renaming the column might be required.
        if column.name == "metadata":
          rdf.__metadata__ = value
        else:
          setattr(rdf, column.name, value)

      yield rdf


def GetMetadata(client_id, client_full_info):
  """Builds ExportedMetadata object for a given client id and ClientFullInfo."""

  metadata = ExportedMetadata()

  last_snapshot = None
  if client_full_info.HasField("last_snapshot"):
    last_snapshot = client_full_info.last_snapshot

  metadata.client_urn = client_id
  metadata.client_age = client_full_info.metadata.first_seen

  if last_snapshot is not None:
    kb = client_full_info.last_snapshot.knowledge_base

    metadata.hostname = kb.fqdn
    metadata.os = kb.os
    metadata.uname = last_snapshot.Uname()
    metadata.os_release = last_snapshot.os_release
    metadata.os_version = last_snapshot.os_version
    metadata.usernames = ",".join(user.username for user in kb.users)

    addresses = last_snapshot.GetMacAddresses()
    if addresses:
      metadata.mac_address = last_snapshot.GetMacAddresses().pop()
    metadata.hardware_info = last_snapshot.hardware_info
    metadata.kernel_version = last_snapshot.kernel

    ci = last_snapshot.cloud_instance
    if ci is not None:
      if ci.cloud_type == ci.InstanceType.AMAZON:
        metadata.cloud_instance_type = metadata.CloudInstanceType.AMAZON
        metadata.cloud_instance_id = ci.amazon.instance_id
      elif ci.cloud_type == ci.InstanceType.GOOGLE:
        metadata.cloud_instance_type = metadata.CloudInstanceType.GOOGLE
        metadata.cloud_instance_id = ci.google.unique_id

  system_labels = set()
  user_labels = set()
  for l in client_full_info.labels:
    if l.owner == "GRR":
      system_labels.add(l.name)
    else:
      user_labels.add(l.name)

  metadata.labels = ",".join(sorted(system_labels | user_labels))
  metadata.system_labels = ",".join(sorted(system_labels))
  metadata.user_labels = ",".join(sorted(user_labels))

  return metadata


def GetMetadataLegacy(client, token=None):
  """Builds ExportedMetadata object for a given client id.

  Note: This is a legacy aff4-only implementation.
  TODO(user): deprecate as soon as REL_DB migration is done.

  Args:
    client: RDFURN of a client or VFSGRRClient object itself.
    token: Security token.

  Returns:
    ExportedMetadata object with metadata of the client.
  """
  if isinstance(client, rdfvalue.RDFURN):
    client_fd = aff4.FACTORY.Open(client, mode="r", token=token)
  else:
    client_fd = client

  metadata = ExportedMetadata()

  metadata.client_urn = client_fd.urn
  metadata.client_age = client_fd.urn.age

  metadata.hostname = utils.SmartUnicode(
      client_fd.Get(client_fd.Schema.HOSTNAME, ""))

  metadata.os = utils.SmartUnicode(client_fd.Get(client_fd.Schema.SYSTEM, ""))

  metadata.uname = utils.SmartUnicode(client_fd.Get(client_fd.Schema.UNAME, ""))

  metadata.os_release = utils.SmartUnicode(
      client_fd.Get(client_fd.Schema.OS_RELEASE, ""))

  metadata.os_version = utils.SmartUnicode(
      client_fd.Get(client_fd.Schema.OS_VERSION, ""))

  kb = client_fd.Get(client_fd.Schema.KNOWLEDGE_BASE)
  usernames = ""
  if kb:
    usernames = [user.username for user in kb.users] or ""
  metadata.usernames = utils.SmartUnicode(usernames)

  metadata.mac_address = utils.SmartUnicode(
      client_fd.Get(client_fd.Schema.MAC_ADDRESS, ""))

  system_labels = set()
  user_labels = set()
  for l in client_fd.GetLabels():
    if l.owner == "GRR":
      system_labels.add(l.name)
    else:
      user_labels.add(l.name)

  metadata.labels = ",".join(sorted(system_labels | user_labels))

  metadata.system_labels = ",".join(sorted(system_labels))

  metadata.user_labels = ",".join(sorted(user_labels))

  metadata.hardware_info = client_fd.Get(client_fd.Schema.HARDWARE_INFO)

  metadata.kernel_version = client_fd.Get(client_fd.Schema.KERNEL)

  return metadata


def ConvertValuesWithMetadata(metadata_value_pairs, token=None, options=None):
  """Converts a set of RDFValues into a set of export-friendly RDFValues.

  Args:
    metadata_value_pairs: Tuples of (metadata, rdf_value), where metadata is an
      instance of ExportedMetadata and rdf_value is an RDFValue subclass
      instance to be exported.
    token: Security token.
    options: rdfvalue.ExportOptions instance that will be passed to
      ExportConverters.

  Yields:
    Converted values. Converted values may be of different types.

  Raises:
    NoConverterFound: in case no suitable converters were found for a value in
                      metadata_value_pairs. This error is only raised after
                      all values in metadata_value_pairs are attempted to be
                      converted. If there are multiple value types that could
                      not be converted because of the lack of corresponding
                      converters, only the last one will be specified in the
                      exception message.
  """
  no_converter_found_error = None
  for metadata_values_group in itervalues(
      collection.Group(
          metadata_value_pairs, lambda pair: pair[1].__class__.__name__)):

    _, first_value = metadata_values_group[0]
    converters_classes = ExportConverter.GetConvertersByValue(first_value)
    if not converters_classes:
      no_converter_found_error = "No converters found for value: %s" % str(
          first_value)
      continue

    converters = [cls(options) for cls in converters_classes]
    for converter in converters:
      for result in converter.BatchConvert(metadata_values_group, token=token):
        yield result

  if no_converter_found_error is not None:
    raise NoConverterFound(no_converter_found_error)


def ConvertValues(default_metadata, values, token=None, options=None):
  """Converts a set of RDFValues into a set of export-friendly RDFValues.

  Args:
    default_metadata: export.ExportedMetadata instance with basic information
      about where the values come from. This metadata will be passed to
      exporters.
    values: Values to convert. They should be of the same type.
    token: Security token.
    options: rdfvalue.ExportOptions instance that will be passed to
      ExportConverters.

  Returns:
    Converted values. Converted values may be of different types
    (unlike the source values which are all of the same type). This is due to
    the fact that multiple ExportConverters may be applied to the same value
    thus generating multiple converted values of different types.

  Raises:
    NoConverterFound: in case no suitable converters were found for the values.
  """
  batch_data = [(default_metadata, obj) for obj in values]
  return ConvertValuesWithMetadata(batch_data, token=token, options=options)
